import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import psycopg2
import psycopg2.extras as extras
from pwdAs import passwd
df = pd.read_csv('listings.csv')
df.head(1)
| id | listing_url | scrape_id | last_scraped | source | name | description | neighborhood_overview | picture_url | host_id | ... | review_scores_communication | review_scores_location | review_scores_value | license | instant_bookable | calculated_host_listings_count | calculated_host_listings_count_entire_homes | calculated_host_listings_count_private_rooms | calculated_host_listings_count_shared_rooms | reviews_per_month | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 5396 | https://www.airbnb.com/rooms/5396 | 20230313171257 | 2023-03-14 | city scrape | Explore the heart of old Paris | Cozy, well-appointed and graciously designed s... | You are within walking distance to the Louvre,... | https://a0.muscache.com/pictures/52413/f9bf76f... | 7903 | ... | 4.83 | 4.95 | 4.54 | 7510402838018 | f | 1 | 1 | 0 | 0 | 1.99 |
1 rows × 75 columns
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 56726 entries, 0 to 56725 Data columns (total 75 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 id 56726 non-null int64 1 listing_url 56726 non-null object 2 scrape_id 56726 non-null int64 3 last_scraped 56726 non-null object 4 source 56726 non-null object 5 name 56689 non-null object 6 description 56161 non-null object 7 neighborhood_overview 32447 non-null object 8 picture_url 56725 non-null object 9 host_id 56726 non-null int64 10 host_url 56726 non-null object 11 host_name 56708 non-null object 12 host_since 56708 non-null object 13 host_location 46634 non-null object 14 host_about 26865 non-null object 15 host_response_time 31149 non-null object 16 host_response_rate 31149 non-null object 17 host_acceptance_rate 35410 non-null object 18 host_is_superhost 56686 non-null object 19 host_thumbnail_url 56708 non-null object 20 host_picture_url 56708 non-null object 21 host_neighbourhood 32895 non-null object 22 host_listings_count 56708 non-null float64 23 host_total_listings_count 56708 non-null float64 24 host_verifications 56708 non-null object 25 host_has_profile_pic 56708 non-null object 26 host_identity_verified 56708 non-null object 27 neighbourhood 32447 non-null object 28 neighbourhood_cleansed 56726 non-null object 29 neighbourhood_group_cleansed 0 non-null float64 30 latitude 56726 non-null float64 31 longitude 56726 non-null float64 32 property_type 56726 non-null object 33 room_type 56726 non-null object 34 accommodates 56726 non-null int64 35 bathrooms 0 non-null float64 36 bathrooms_text 56611 non-null object 37 bedrooms 47631 non-null float64 38 beds 55973 non-null float64 39 amenities 56726 non-null object 40 price 56726 non-null object 41 minimum_nights 56726 non-null int64 42 maximum_nights 56726 non-null int64 43 minimum_minimum_nights 56722 non-null float64 44 maximum_minimum_nights 56722 non-null float64 45 minimum_maximum_nights 56722 non-null float64 46 maximum_maximum_nights 56722 non-null float64 47 minimum_nights_avg_ntm 56722 non-null float64 48 maximum_nights_avg_ntm 56722 non-null float64 49 calendar_updated 0 non-null float64 50 has_availability 56726 non-null object 51 availability_30 56726 non-null int64 52 availability_60 56726 non-null int64 53 availability_90 56726 non-null int64 54 availability_365 56726 non-null int64 55 calendar_last_scraped 56726 non-null object 56 number_of_reviews 56726 non-null int64 57 number_of_reviews_ltm 56726 non-null int64 58 number_of_reviews_l30d 56726 non-null int64 59 first_review 45631 non-null object 60 last_review 45631 non-null object 61 review_scores_rating 45631 non-null float64 62 review_scores_accuracy 45108 non-null float64 63 review_scores_cleanliness 45112 non-null float64 64 review_scores_checkin 45099 non-null float64 65 review_scores_communication 45109 non-null float64 66 review_scores_location 45097 non-null float64 67 review_scores_value 45095 non-null float64 68 license 38063 non-null object 69 instant_bookable 56726 non-null object 70 calculated_host_listings_count 56726 non-null int64 71 calculated_host_listings_count_entire_homes 56726 non-null int64 72 calculated_host_listings_count_private_rooms 56726 non-null int64 73 calculated_host_listings_count_shared_rooms 56726 non-null int64 74 reviews_per_month 45631 non-null float64 dtypes: float64(23), int64(17), object(35) memory usage: 32.5+ MB
df.describe()
| id | scrape_id | host_id | host_listings_count | host_total_listings_count | neighbourhood_group_cleansed | latitude | longitude | accommodates | bathrooms | ... | review_scores_cleanliness | review_scores_checkin | review_scores_communication | review_scores_location | review_scores_value | calculated_host_listings_count | calculated_host_listings_count_entire_homes | calculated_host_listings_count_private_rooms | calculated_host_listings_count_shared_rooms | reviews_per_month | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 5.672600e+04 | 5.672600e+04 | 5.672600e+04 | 56708.000000 | 56708.000000 | 0.0 | 56726.000000 | 56726.000000 | 56726.000000 | 0.0 | ... | 45112.000000 | 45099.000000 | 45109.000000 | 45097.000000 | 45095.000000 | 56726.000000 | 56726.000000 | 56726.000000 | 56726.000000 | 45631.000000 |
| mean | 1.973188e+17 | 2.023031e+13 | 1.295704e+08 | 21.893948 | 32.803238 | NaN | 48.864290 | 2.344470 | 3.048919 | NaN | ... | 4.589212 | 4.786115 | 4.800066 | 4.799382 | 4.590021 | 15.104643 | 14.330748 | 0.622307 | 0.015742 | 1.009644 |
| std | 3.209526e+17 | 0.000000e+00 | 1.547738e+08 | 93.175233 | 143.030468 | NaN | 0.018055 | 0.033243 | 1.611676 | NaN | ... | 0.519631 | 0.401047 | 0.396593 | 0.344041 | 0.470871 | 51.584652 | 51.453338 | 4.438866 | 0.263379 | 1.362947 |
| min | 5.396000e+03 | 2.023031e+13 | 2.750000e+02 | 1.000000 | 1.000000 | NaN | 48.816080 | 2.235490 | 0.000000 | NaN | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.010000 |
| 25% | 1.629361e+07 | 2.023031e+13 | 1.537846e+07 | 1.000000 | 1.000000 | NaN | 48.851310 | 2.323630 | 2.000000 | NaN | ... | 4.440000 | 4.750000 | 4.750000 | 4.750000 | 4.470000 | 1.000000 | 1.000000 | 0.000000 | 0.000000 | 0.160000 |
| 50% | 3.663390e+07 | 2.023031e+13 | 4.925020e+07 | 1.000000 | 2.000000 | NaN | 48.865590 | 2.347680 | 2.000000 | NaN | ... | 4.740000 | 4.910000 | 4.940000 | 4.900000 | 4.700000 | 1.000000 | 1.000000 | 0.000000 | 0.000000 | 0.520000 |
| 75% | 5.978393e+17 | 2.023031e+13 | 2.125869e+08 | 3.000000 | 4.000000 | NaN | 48.878690 | 2.368830 | 4.000000 | NaN | ... | 4.950000 | 5.000000 | 5.000000 | 5.000000 | 4.880000 | 2.000000 | 1.000000 | 0.000000 | 0.000000 | 1.330000 |
| max | 8.463336e+17 | 2.023031e+13 | 5.049987e+08 | 2139.000000 | 3629.000000 | NaN | 48.901670 | 2.467120 | 16.000000 | NaN | ... | 5.000000 | 5.000000 | 5.000000 | 5.000000 | 5.000000 | 447.000000 | 447.000000 | 91.000000 | 10.000000 | 49.550000 |
8 rows × 40 columns
df.columns
Index(['id', 'listing_url', 'scrape_id', 'last_scraped', 'source', 'name',
'description', 'neighborhood_overview', 'picture_url', 'host_id',
'host_url', 'host_name', 'host_since', 'host_location', 'host_about',
'host_response_time', 'host_response_rate', 'host_acceptance_rate',
'host_is_superhost', 'host_thumbnail_url', 'host_picture_url',
'host_neighbourhood', 'host_listings_count',
'host_total_listings_count', 'host_verifications',
'host_has_profile_pic', 'host_identity_verified', 'neighbourhood',
'neighbourhood_cleansed', 'neighbourhood_group_cleansed', 'latitude',
'longitude', 'property_type', 'room_type', 'accommodates', 'bathrooms',
'bathrooms_text', 'bedrooms', 'beds', 'amenities', 'price',
'minimum_nights', 'maximum_nights', 'minimum_minimum_nights',
'maximum_minimum_nights', 'minimum_maximum_nights',
'maximum_maximum_nights', 'minimum_nights_avg_ntm',
'maximum_nights_avg_ntm', 'calendar_updated', 'has_availability',
'availability_30', 'availability_60', 'availability_90',
'availability_365', 'calendar_last_scraped', 'number_of_reviews',
'number_of_reviews_ltm', 'number_of_reviews_l30d', 'first_review',
'last_review', 'review_scores_rating', 'review_scores_accuracy',
'review_scores_cleanliness', 'review_scores_checkin',
'review_scores_communication', 'review_scores_location',
'review_scores_value', 'license', 'instant_bookable',
'calculated_host_listings_count',
'calculated_host_listings_count_entire_homes',
'calculated_host_listings_count_private_rooms',
'calculated_host_listings_count_shared_rooms', 'reviews_per_month'],
dtype='object')
df.shape
(56726, 75)
df1 = df[['id','listing_url','name','description','neighborhood_overview','host_id','host_url','host_name','neighbourhood_cleansed','latitude','longitude','room_type','price','minimum_nights','minimum_nights','availability_365','number_of_reviews','last_review','calculated_host_listings_count','reviews_per_month']].copy()
df1.head(1)
| id | listing_url | name | description | neighborhood_overview | host_id | host_url | host_name | neighbourhood_cleansed | latitude | longitude | room_type | price | minimum_nights | minimum_nights | availability_365 | number_of_reviews | last_review | calculated_host_listings_count | reviews_per_month | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 5396 | https://www.airbnb.com/rooms/5396 | Explore the heart of old Paris | Cozy, well-appointed and graciously designed s... | You are within walking distance to the Louvre,... | 7903 | https://www.airbnb.com/users/show/7903 | Borzou | Hôtel-de-Ville | 48.85247 | 2.35835 | Entire home/apt | $124.00 | 15 | 15 | 184 | 332 | 2023-02-18 | 1 | 1.99 |
df1.isna().sum()
id 0 listing_url 0 name 37 description 565 neighborhood_overview 24279 host_id 0 host_url 0 host_name 18 neighbourhood_cleansed 0 latitude 0 longitude 0 room_type 0 price 0 minimum_nights 0 minimum_nights 0 availability_365 0 number_of_reviews 0 last_review 11095 calculated_host_listings_count 0 reviews_per_month 11095 dtype: int64
df1['last_review']
0 2023-02-18
1 2023-03-15
2 2015-09-14
3 NaN
4 2022-10-21
...
56721 NaN
56722 NaN
56723 NaN
56724 NaN
56725 NaN
Name: last_review, Length: 56726, dtype: object
df1['last_review'] = pd.to_datetime(df1['last_review'])
df1['last_review']
0 2023-02-18
1 2023-03-15
2 2015-09-14
3 NaT
4 2022-10-21
...
56721 NaT
56722 NaT
56723 NaT
56724 NaT
56725 NaT
Name: last_review, Length: 56726, dtype: datetime64[ns]
df1['last_review'] = df1['last_review'].fillna(df1['last_review'].max())
df1['last_review']
0 2023-02-18
1 2023-03-15
2 2015-09-14
3 2023-03-26
4 2022-10-21
...
56721 2023-03-26
56722 2023-03-26
56723 2023-03-26
56724 2023-03-26
56725 2023-03-26
Name: last_review, Length: 56726, dtype: datetime64[ns]
df1['reviews_per_month'] = df1['reviews_per_month'].fillna(0)
df1['reviews_per_month']
0 1.99
1 2.26
2 0.04
3 0.00
4 0.35
...
56721 0.00
56722 0.00
56723 0.00
56724 0.00
56725 0.00
Name: reviews_per_month, Length: 56726, dtype: float64
arraytofil = ['name', 'host_name', 'description', 'neighborhood_overview', 'neighbourhood_cleansed']
df1[arraytofil] = df1[arraytofil].fillna("")
df1.isna().sum()
id 0 listing_url 0 name 0 description 0 neighborhood_overview 0 host_id 0 host_url 0 host_name 0 neighbourhood_cleansed 0 latitude 0 longitude 0 room_type 0 price 0 minimum_nights 0 minimum_nights 0 availability_365 0 number_of_reviews 0 last_review 0 calculated_host_listings_count 0 reviews_per_month 0 dtype: int64
df1['price'] = df1['price'].str.replace('$', "")
df1['price']
0 124.00
1 130.00
2 140.00
3 75.00
4 160.00
...
56721 640.00
56722 538.00
56723 600.00
56724 181.00
56725 300.00
Name: price, Length: 56726, dtype: object
df1['reserved_365'] = (365 - df1['availability_365'])
df1.head(1)
| id | listing_url | name | description | neighborhood_overview | host_id | host_url | host_name | neighbourhood_cleansed | latitude | ... | room_type | price | minimum_nights | minimum_nights | availability_365 | number_of_reviews | last_review | calculated_host_listings_count | reviews_per_month | reserved_365 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 5396 | https://www.airbnb.com/rooms/5396 | Explore the heart of old Paris | Cozy, well-appointed and graciously designed s... | You are within walking distance to the Louvre,... | 7903 | https://www.airbnb.com/users/show/7903 | Borzou | Hôtel-de-Ville | 48.85247 | ... | Entire home/apt | 124.00 | 15 | 15 | 184 | 332 | 2023-02-18 | 1 | 1.99 | 181 |
1 rows × 21 columns
df1['host_id'].nunique()
42490
top_hosts = df1['host_id'].value_counts().nlargest(10)
top_hosts
host_id 314994947 447 50978178 230 50502817 214 7642792 196 440295601 195 33889201 182 458533553 165 26981054 156 402191311 141 6053288 131 Name: count, dtype: int64
top_host = df1.groupby(['host_id','host_name'])['host_id'].value_counts().nlargest(10)
top_host
host_id host_name 314994947 Blueground 447 50978178 Sebastien 230 50502817 Pierre De WeHost 214 7642792 Ludovic 196 440295601 Gaelle 195 33889201 Veeve 182 458533553 Joffrey 165 26981054 Cédric 156 402191311 GuestReady 141 6053288 Anna 131 Name: count, dtype: int64
top_hosts_df = df1[df1['host_id'].isin(top_hosts.index)]
top_hosts_df
| id | listing_url | name | description | neighborhood_overview | host_id | host_url | host_name | neighbourhood_cleansed | latitude | ... | room_type | price | minimum_nights | minimum_nights | availability_365 | number_of_reviews | last_review | calculated_host_listings_count | reviews_per_month | reserved_365 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 190 | 52931 | https://www.airbnb.com/rooms/52931 | Charming apt in the 17th of Paris - Terrace 25m2 | I will be delighted to welcome you to my charm... | In some streets, Les Épinettes has also become... | 50502817 | https://www.airbnb.com/users/show/50502817 | Pierre De WeHost | Batignolles-Monceau | 48.896130 | ... | Entire home/apt | 141.00 | 3 | 3 | 78 | 8 | 2023-02-24 | 214 | 1.79 | 287 |
| 662 | 810543 | https://www.airbnb.com/rooms/810543 | GuestReady - Spacious Bright flat near Louvre | This chic 1 bedroom apartment in the trendy 1s... | The apartment is perfectly located in the hear... | 402191311 | https://www.airbnb.com/users/show/402191311 | GuestReady | Louvre | 48.866310 | ... | Entire home/apt | 125.00 | 1 | 1 | 2 | 76 | 2023-01-07 | 141 | 0.64 | 363 |
| 966 | 1022819 | https://www.airbnb.com/rooms/1022819 | ✹Cocoon in the center of Montmartere✹ | The accommodation can accommodate two people w... | A very quiet place in the heart of a very busy... | 50502817 | https://www.airbnb.com/users/show/50502817 | Pierre De WeHost | Buttes-Montmartre | 48.887900 | ... | Entire home/apt | 80.00 | 3 | 3 | 162 | 120 | 2022-06-01 | 214 | 1.01 | 203 |
| 1323 | 1430881 | https://www.airbnb.com/rooms/1430881 | Fleur de Rosiers, 1BR/1BA, 3 people | 28 square meters apartment on the third floor ... | Le Marais, it's the festive, fashion, artistic... | 7642792 | https://www.airbnb.com/users/show/7642792 | Ludovic | Hôtel-de-Ville | 48.856230 | ... | Entire home/apt | 133.00 | 91 | 91 | 0 | 9 | 2017-10-17 | 196 | 0.08 | 365 |
| 1366 | 1452226 | https://www.airbnb.com/rooms/1452226 | Parfum d'Histoire, 3BR/1BA 6 people | Beautiful apartment of 62 m2 floor, with two b... | Le Marais, it's the festive, fashion, artistic... | 7642792 | https://www.airbnb.com/users/show/7642792 | Ludovic | Temple | 48.861000 | ... | Entire home/apt | 249.00 | 91 | 91 | 68 | 30 | 2017-05-01 | 196 | 0.27 | 297 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 56661 | 846322669044068092 | https://www.airbnb.com/rooms/846322669044068092 | Ternes 1br w/ elevator, nr Arc de Triomphe | Show up and start living from day one in Paris... | This neighborhood takes its name from the Plac... | 314994947 | https://www.airbnb.com/users/show/314994947 | Blueground | Batignolles-Monceau | 48.879684 | ... | Entire home/apt | 132.00 | 31 | 31 | 365 | 0 | 2023-03-26 | 447 | 0.00 | 0 |
| 56672 | 846329760591558172 | https://www.airbnb.com/rooms/846329760591558172 | Trocadero 3br w/ balcony, nr Place du Trocadéro | Show up and start living from day one in Paris... | Within the borders of the 16th, Trocadéro host... | 314994947 | https://www.airbnb.com/users/show/314994947 | Blueground | Passy | 48.869268 | ... | Entire home/apt | 312.00 | 31 | 31 | 337 | 0 | 2023-03-26 | 447 | 0.00 | 28 |
| 56673 | 846332777690107540 | https://www.airbnb.com/rooms/846332777690107540 | Trocadéro 2br w/ elevator, nr Eiffel Tower | Show up and start living from day one in Paris... | Within the borders of the 16th, Trocadéro host... | 314994947 | https://www.airbnb.com/users/show/314994947 | Blueground | Passy | 48.866772 | ... | Entire home/apt | 253.00 | 31 | 31 | 0 | 0 | 2023-03-26 | 447 | 0.00 | 365 |
| 56676 | 846333585388413037 | https://www.airbnb.com/rooms/846333585388413037 | Champs Elysées 2br w/ balcony, nr Montaigne St | Discover the best of Paris, with this two-bedr... | One of Paris’s best-loved monuments, the histo... | 314994947 | https://www.airbnb.com/users/show/314994947 | Blueground | Élysée | 48.873735 | ... | Entire home/apt | 350.00 | 31 | 31 | 0 | 0 | 2023-03-26 | 447 | 0.00 | 365 |
| 56712 | 844680838536598952 | https://www.airbnb.com/rooms/844680838536598952 | Old apartment for 4 - Paris 20 E | Travelers particularly appreciated this accomm... | The Père-Lachaise Saint Frageau district is a ... | 6053288 | https://www.airbnb.com/users/show/6053288 | Anna | Ménilmontant | 48.865593 | ... | Entire home/apt | 112.00 | 3 | 3 | 19 | 0 | 2023-03-26 | 131 | 0.00 | 346 |
2057 rows × 21 columns
highestListing = df1['host_id'].value_counts().idxmax()
highestListing
314994947
df1['neighbourhood_cleansed'].unique()
array(['Hôtel-de-Ville', 'Opéra', 'Louvre', 'Popincourt',
'Buttes-Montmartre', 'Luxembourg', 'Gobelins', 'Entrepôt',
'Batignolles-Monceau', 'Temple', 'Buttes-Chaumont', 'Bourse',
'Ménilmontant', 'Observatoire', 'Panthéon', 'Vaugirard', 'Élysée',
'Reuilly', 'Passy', 'Palais-Bourbon'], dtype=object)
df1['room_type'].unique()
array(['Entire home/apt', 'Private room', 'Shared room', 'Hotel room'],
dtype=object)
largestReview = df1.nlargest(10, 'number_of_reviews' )
largestReview
| id | listing_url | name | description | neighborhood_overview | host_id | host_url | host_name | neighbourhood_cleansed | latitude | ... | room_type | price | minimum_nights | minimum_nights | availability_365 | number_of_reviews | last_review | calculated_host_listings_count | reviews_per_month | reserved_365 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 21647 | 26244787 | https://www.airbnb.com/rooms/26244787 | Double/Twin Room, close to Opera and the Louvr... | <b>The space</b><br />The Daunou Opera enjoys ... | 98020850 | https://www.airbnb.com/users/show/98020850 | Lionel | Bourse | 48.86958 | ... | Hotel room | 364.00 | 1 | 1 | 337 | 2524 | 2023-03-12 | 8 | 49.55 | 28 | |
| 14749 | 17222007 | https://www.airbnb.com/rooms/17222007 | Sweet & cosy room next to Canal Saint Martin ❤️ | The room is located right in the heart of la R... | The room I offer is right in the middle of a v... | 84901734 | https://www.airbnb.com/users/show/84901734 | Alexandra | Entrepôt | 48.86989 | ... | Private room | 147.00 | 1 | 1 | 99 | 2052 | 2023-02-27 | 5 | 28.79 | 266 |
| 31468 | 41020735 | https://www.airbnb.com/rooms/41020735 | Bed in Dorm of 8 Beds "The Big One" in Paris | Welcome to Les Piaules, our hostel & bar in do... | Les Piaules hostel is located in Paris arty & ... | 315206797 | https://www.airbnb.com/users/show/315206797 | Les Piaules | Popincourt | 48.86998 | ... | Shared room | 41.00 | 1 | 1 | 364 | 1647 | 2023-02-27 | 11 | 44.19 | 1 |
| 26941 | 35145338 | https://www.airbnb.com/rooms/35145338 | Nice Room for 2 people | Hotel de l'Aqueduc is located near the center ... | 264483496 | https://www.airbnb.com/users/show/264483496 | Hotel De L’Aqueduc | Entrepôt | 48.88137 | ... | Hotel room | 124.00 | 1 | 1 | 227 | 1433 | 2023-03-10 | 4 | 35.83 | 138 | |
| 30717 | 40194697 | https://www.airbnb.com/rooms/40194697 | Comfortable bed in shared rooms of 8 in Paris 12e | Kick your journey off with a coffee on the 8th... | Make your stay a magic, cocooning and romantic... | 309707704 | https://www.airbnb.com/users/show/309707704 | The People Paris Bercy | Reuilly | 48.83903 | ... | Shared room | 42.00 | 1 | 1 | 106 | 1420 | 2023-02-27 | 10 | 35.41 | 259 |
| 25092 | 32518543 | https://www.airbnb.com/rooms/32518543 | Hotel Boronali *** - Double room in Montmartre | Book this room, and discover the historical Pa... | Occupied since Gallo-Roman times, Montmartre h... | 98087989 | https://www.airbnb.com/users/show/98087989 | Arthur | Buttes-Montmartre | 48.88869 | ... | Hotel room | 174.00 | 1 | 1 | 326 | 1352 | 2023-03-13 | 4 | 27.54 | 39 |
| 768 | 846954 | https://www.airbnb.com/rooms/846954 | Gde chambre ds grand appart. Salle de bain privée | Chambre 20 m2 dans appartement spacieux (120 m... | Il est animé, "parisien", près du centre, très... | 3125405 | https://www.airbnb.com/users/show/3125405 | Maguy | Popincourt | 48.85416 | ... | Private room | 58.00 | 1 | 1 | 4 | 1004 | 2023-03-10 | 1 | 8.08 | 361 |
| 24946 | 32057469 | https://www.airbnb.com/rooms/32057469 | Sweet Little Room near St Martin Canal | Welcome to Hotel Tilde!<br />Prepared for you!... | In the 19th arrondissement of Paris, the hotel... | 183489963 | https://www.airbnb.com/users/show/183489963 | Naïm | Buttes-Chaumont | 48.88720 | ... | Private room | 120.00 | 1 | 1 | 324 | 986 | 2022-07-31 | 9 | 19.80 | 41 |
| 1272 | 1249964 | https://www.airbnb.com/rooms/1249964 | A Journey Into The Heart Of Paris | You are looking for a private and whole apartm... | The 1st district in which abounds most monumen... | 6811343 | https://www.airbnb.com/users/show/6811343 | Mike | Louvre | 48.86486 | ... | Private room | 149.00 | 1 | 1 | 146 | 947 | 2023-03-10 | 1 | 8.01 | 219 |
| 27199 | 35242476 | https://www.airbnb.com/rooms/35242476 | Superb room for two | In order to guarantee the room, a pre-authoriz... | The Europe Saint-Séverin Paris Notre-Dame hote... | 264941340 | https://www.airbnb.com/users/show/264941340 | Marine | Panthéon | 48.85275 | ... | Hotel room | 210.00 | 1 | 1 | 285 | 835 | 2023-02-24 | 10 | 18.24 | 80 |
10 rows × 21 columns
df1['price'] = df1['price'].str.replace(',', '')
df1['price'] = pd.to_numeric(df1['price'])
df1['annual_income'] = df1['price'] * df1['reserved_365']
df1.head(1)
| id | listing_url | name | description | neighborhood_overview | host_id | host_url | host_name | neighbourhood_cleansed | latitude | ... | price | minimum_nights | minimum_nights | availability_365 | number_of_reviews | last_review | calculated_host_listings_count | reviews_per_month | reserved_365 | annual_income | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 5396 | https://www.airbnb.com/rooms/5396 | Explore the heart of old Paris | Cozy, well-appointed and graciously designed s... | You are within walking distance to the Louvre,... | 7903 | https://www.airbnb.com/users/show/7903 | Borzou | Hôtel-de-Ville | 48.85247 | ... | 124.0 | 15 | 15 | 184 | 332 | 2023-02-18 | 1 | 1.99 | 181 | 22444.0 |
1 rows × 22 columns
top20 = df1.nlargest(20, 'annual_income')
top20[['id', 'name', 'host_id', 'host_name', 'price', 'reserved_365', 'annual_income']]
| id | name | host_id | host_name | price | reserved_365 | annual_income | |
|---|---|---|---|---|---|---|---|
| 34264 | 44587494 | Citadines Tour Eiffel Paris, Eiffel Tower Studio | 342052277 | Adrien | 93000.0 | 365 | 33945000.0 |
| 25964 | 33521511 | Studio moderne 34 m2 + Balcon à Paris 15 -Javel | 10469459 | Yaniss | 64406.0 | 355 | 22864130.0 |
| 41290 | 557849127896979136 | Charming Apartment 3BR/6P - Convention/Serres -2 | 444138334 | Joffrey | 11600.0 | 365 | 4234000.0 |
| 42690 | 598345674662965219 | Charming apartment - 1BR/4P - Bercy / Accor Arena | 437852947 | Joffrey | 11600.0 | 365 | 4234000.0 |
| 43302 | 615781188493141230 | Charming apartment - 2BR/8P - Bercy / Accor Arena | 437852947 | Joffrey | 11600.0 | 365 | 4234000.0 |
| 34002 | 44218823 | Modern Furnished Apartment Near Metro and Paris | 356267449 | Tom | 10000.0 | 365 | 3650000.0 |
| 7605 | 7810360 | "Peace and Light in Heart of Paris" | 41118166 | Albert And Elizabeth | 9336.0 | 365 | 3407640.0 |
| 31923 | 41682013 | Appartement 6-7 pers. Près du canal Saint-Martin | 149823675 | Xia | 12000.0 | 278 | 3336000.0 |
| 271 | 243040 | Apart Hotel / Paris Opera Madeleine | 1275649 | Gwenael | 8000.0 | 365 | 2920000.0 |
| 8165 | 8584192 | ✨✨✨✨ Paris //Monceau✨✨✨✨ | 45177988 | Emeuric | 8000.0 | 365 | 2920000.0 |
| 46152 | 662995681250893197 | Paris : joli 2 pièces /studio duplex Bastille | 69337302 | Aurelie | 8000.0 | 365 | 2920000.0 |
| 48510 | 701870599684429320 | Cosy apartment 4 Br 8 P Opera Galeries Lafayette | 440992639 | Joffrey | 9280.0 | 306 | 2839680.0 |
| 52710 | 780399078200229165 | Luxurious Haussmannian apartment 3BR Saint-Ger... | 479175248 | Joffrey | 9280.0 | 283 | 2626240.0 |
| 20907 | 25649477 | ⭐Chic Parisian 2-rooms Apartment - Close to m... | 47201750 | Charles Et Charlotte | 7176.0 | 365 | 2619240.0 |
| 49136 | 714214985822107891 | Amazing appartement 10 P Le Bon Marché | 479175248 | Joffrey | 9280.0 | 278 | 2579840.0 |
| 15573 | 18424066 | Charmant 2 pieces 40m2 avec terrasse - 4 perso... | 28202338 | Laurent | 9000.0 | 274 | 2466000.0 |
| 6770 | 7225849 | Artistic apartment, Montmartre | 34063120 | Martin | 6742.0 | 365 | 2460830.0 |
| 35685 | 47267036 | Spacious & Cosy in Le Marais ! | 2107478 | Philippe | 6974.0 | 292 | 2036408.0 |
| 47472 | 681022718347744236 | Studio calme & chaleureux à Bastille | 425457902 | Astrid | 25836.0 | 74 | 1911864.0 |
| 45411 | 653476431703016094 | Lovely flat in St Germain !!! | 2107478 | Philippe | 7999.0 | 230 | 1839770.0 |
df1.groupby('host_id')['annual_income'].sum().sort_values(ascending=False)
host_id
33889201 90967904.0
342052277 34089835.0
10469459 22864130.0
314994947 13541778.0
50978178 11511322.0
...
334363379 0.0
65716046 0.0
30932327 0.0
8830811 0.0
13003119 0.0
Name: annual_income, Length: 42490, dtype: float64
df1.groupby('neighbourhood_cleansed')['id'].count().nlargest(10)
neighbourhood_cleansed Buttes-Montmartre 6262 Popincourt 5149 Vaugirard 4258 Entrepôt 4019 Batignolles-Monceau 3625 Buttes-Chaumont 3139 Ménilmontant 3139 Passy 3028 Opéra 2884 Temple 2633 Name: id, dtype: int64
listings_count = df1['host_id'].value_counts()
top_20_hosts = listings_count.nlargest(20)
top_20_hosts.plot(kind='bar')
plt.title('Top 20 of host having most listing')
plt.xlabel('hosts')
plt.ylabel('number of list')
plt.show()
hotel_df = df1[df1['room_type'] == 'Hotel room']
listings_count = hotel_df['host_id'].value_counts()
top_20_hosts = listings_count.nlargest(20)
top_20_hosts.plot(kind='bar')
plt.title('Top 20 hosts having most hotel')
plt.xlabel('hosts')
plt.ylabel('number of list')
plt.show()
neighbourhoodPop = df1.groupby('neighbourhood_cleansed')['id'].count()
neighbourhoodPop.sort_values(ascending=False, inplace=True)
neighbourhoodPop.plot(kind='barh')
plt.title('Top 20 Neighbourhood in Paris')
plt.xlabel('number of listing')
plt.ylabel('Neighbourhood in Paris')
plt.show()
neighbourhoodMeanPrice = df1.groupby('neighbourhood_cleansed')['price'].mean()
neighbourhoodMeanPrice.sort_values(ascending=False, inplace=True)
neighbourhoodMeanPrice
neighbourhood_cleansed Élysée 318.806575 Luxembourg 255.033254 Palais-Bourbon 253.695652 Louvre 252.700962 Passy 229.185931 Hôtel-de-Ville 223.071280 Bourse 221.398941 Temple 202.765667 Vaugirard 195.083138 Opéra 187.090846 Panthéon 186.767755 Batignolles-Monceau 152.992828 Reuilly 152.342870 Entrepôt 147.442150 Popincourt 134.015731 Observatoire 133.406731 Buttes-Chaumont 129.555272 Buttes-Montmartre 121.619451 Gobelins 119.558090 Ménilmontant 97.249442 Name: price, dtype: float64
range = [0, 84, 180, 270, 365]
df1['availability_365_range'] = pd.cut(df1['availability_365'], range, labels=['< 84 D', '90-180 D', '180-270 D', '> 270 D'])
availability_percent = df1['availability_365_range'].value_counts(normalize=True) * 100
availability_percent.sort_index().plot(kind='pie', autopct='%1.1f%%')
plt.title('Availability %')
plt.xlabel('Availability range')
plt.ylabel('Percentage (%)')
plt.show()
mostUnpopular = df1.groupby('neighbourhood_cleansed')['id'].count()
mostUnpopular.sort_values(ascending=True,inplace=True)
mostUnpopular.nsmallest(10)
neighbourhood_cleansed Louvre 1351 Palais-Bourbon 1564 Luxembourg 1684 Élysée 1825 Gobelins 1885 Hôtel-de-Ville 1922 Panthéon 1929 Bourse 2078 Observatoire 2080 Reuilly 2272 Name: id, dtype: int64
countList = df1['room_type'].value_counts()
countList.plot(kind='bar')
# Set the title and labels
plt.title('Listings by Type')
plt.xlabel(' Type')
plt.ylabel('Number of lst')
# Show the plot
plt.show()
df1['room_type'].value_counts(normalize=True) * 100
room_type Entire home/apt 85.234284 Private room 12.643232 Hotel room 1.613017 Shared room 0.509467 Name: proportion, dtype: float64
pivot_table = df1.pivot_table(index='neighbourhood_cleansed', columns='room_type', values='id', aggfunc='count')
pivot_table
| room_type | Entire home/apt | Hotel room | Private room | Shared room |
|---|---|---|---|---|
| neighbourhood_cleansed | ||||
| Batignolles-Monceau | 3080 | 56 | 474 | 15 |
| Bourse | 1876 | 23 | 174 | 5 |
| Buttes-Chaumont | 2604 | 3 | 492 | 40 |
| Buttes-Montmartre | 5548 | 58 | 637 | 19 |
| Entrepôt | 3441 | 40 | 522 | 16 |
| Gobelins | 1481 | 14 | 379 | 11 |
| Hôtel-de-Ville | 1735 | 8 | 170 | 9 |
| Louvre | 1143 | 52 | 152 | 4 |
| Luxembourg | 1369 | 86 | 227 | 2 |
| Ménilmontant | 2609 | 11 | 496 | 23 |
| Observatoire | 1671 | 33 | 364 | 12 |
| Opéra | 2282 | 92 | 504 | 6 |
| Palais-Bourbon | 1382 | 53 | 124 | 5 |
| Panthéon | 1607 | 52 | 260 | 10 |
| Passy | 2615 | 68 | 325 | 20 |
| Popincourt | 4525 | 34 | 560 | 30 |
| Reuilly | 1864 | 24 | 366 | 18 |
| Temple | 2443 | 24 | 155 | 11 |
| Vaugirard | 3598 | 71 | 561 | 28 |
| Élysée | 1477 | 113 | 230 | 5 |
pivot_table.plot(kind='bar', figsize=(20,18))
plt.title('Listings by Neighbourhood')
plt.xlabel('Neighbourhood')
plt.ylabel('Number of Listings')
# Show the plot
plt.show()
pivot_table = df1.pivot_table(index='neighbourhood_cleansed', columns='room_type', values='price', aggfunc='sum')
print(pivot_table)
room_type Entire home/apt Hotel room Private room Shared room neighbourhood_cleansed Batignolles-Monceau 459542.0 14998.0 74032.0 6027.0 Bourse 404053.0 11034.0 41949.0 3031.0 Buttes-Chaumont 357654.0 1305.0 42788.0 4927.0 Buttes-Montmartre 672983.0 13243.0 73915.0 1440.0 Entrepôt 525677.0 14902.0 50977.0 1014.0 Gobelins 190692.0 4165.0 30012.0 498.0 Hôtel-de-Ville 394710.0 4880.0 27840.0 1313.0 Louvre 287805.0 17339.0 36043.0 212.0 Luxembourg 354243.0 30922.0 44097.0 214.0 Ménilmontant 267686.0 4696.0 31760.0 1124.0 Observatoire 223662.0 7755.0 45354.0 715.0 Opéra 404375.0 29163.0 105701.0 331.0 Palais-Bourbon 348191.0 18417.0 29913.0 259.0 Panthéon 300204.0 22061.0 37415.0 595.0 Passy 625190.0 23242.0 43392.0 2151.0 Popincourt 630516.0 7644.0 50125.0 1762.0 Reuilly 298201.0 4647.0 42488.0 787.0 Temple 497362.0 7687.0 27607.0 1226.0 Vaugirard 740321.0 16581.0 72360.0 1402.0 Élysée 462073.0 45129.0 74277.0 343.0
plt.scatter(pivot_table.index, pivot_table['Private room'])
plt.title('Mean Price by neighbourhood')
plt.xlabel('neighbourhood')
plt.ylabel('Price')
plt.xticks(rotation = 90)
plt.show()
louvreLow500 = df1[(df1['neighbourhood_cleansed'] == 'Louvre') & (df1['price'] < 500 )]
plt.boxplot(louvreLow500['price'])
plt.title('Prices Louvre Price < 500')
plt.ylabel('price')
# Show the plot
plt.show()
fig = px.scatter(df1, x='longitude', y='latitude', color='neighbourhood_cleansed', hover_data=['price', 'room_type'])
fig.update_layout(width=1000, height=600)
fig.show()
top100review = df1.nlargest(100, 'number_of_reviews')
top100review
| id | listing_url | name | description | neighborhood_overview | host_id | host_url | host_name | neighbourhood_cleansed | latitude | ... | minimum_nights | minimum_nights | availability_365 | number_of_reviews | last_review | calculated_host_listings_count | reviews_per_month | reserved_365 | annual_income | availability_365_range | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 21647 | 26244787 | https://www.airbnb.com/rooms/26244787 | Double/Twin Room, close to Opera and the Louvr... | <b>The space</b><br />The Daunou Opera enjoys ... | 98020850 | https://www.airbnb.com/users/show/98020850 | Lionel | Bourse | 48.86958 | ... | 1 | 1 | 337 | 2524 | 2023-03-12 | 8 | 49.55 | 28 | 10192.0 | > 270 D | |
| 14749 | 17222007 | https://www.airbnb.com/rooms/17222007 | Sweet & cosy room next to Canal Saint Martin ❤️ | The room is located right in the heart of la R... | The room I offer is right in the middle of a v... | 84901734 | https://www.airbnb.com/users/show/84901734 | Alexandra | Entrepôt | 48.86989 | ... | 1 | 1 | 99 | 2052 | 2023-02-27 | 5 | 28.79 | 266 | 39102.0 | 90-180 D |
| 31468 | 41020735 | https://www.airbnb.com/rooms/41020735 | Bed in Dorm of 8 Beds "The Big One" in Paris | Welcome to Les Piaules, our hostel & bar in do... | Les Piaules hostel is located in Paris arty & ... | 315206797 | https://www.airbnb.com/users/show/315206797 | Les Piaules | Popincourt | 48.86998 | ... | 1 | 1 | 364 | 1647 | 2023-02-27 | 11 | 44.19 | 1 | 41.0 | > 270 D |
| 26941 | 35145338 | https://www.airbnb.com/rooms/35145338 | Nice Room for 2 people | Hotel de l'Aqueduc is located near the center ... | 264483496 | https://www.airbnb.com/users/show/264483496 | Hotel De L’Aqueduc | Entrepôt | 48.88137 | ... | 1 | 1 | 227 | 1433 | 2023-03-10 | 4 | 35.83 | 138 | 17112.0 | 180-270 D | |
| 30717 | 40194697 | https://www.airbnb.com/rooms/40194697 | Comfortable bed in shared rooms of 8 in Paris 12e | Kick your journey off with a coffee on the 8th... | Make your stay a magic, cocooning and romantic... | 309707704 | https://www.airbnb.com/users/show/309707704 | The People Paris Bercy | Reuilly | 48.83903 | ... | 1 | 1 | 106 | 1420 | 2023-02-27 | 10 | 35.41 | 259 | 10878.0 | 90-180 D |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 9744 | 10948338 | https://www.airbnb.com/rooms/10948338 | Paris je t’aime ,Montmartre village | Joli appartement rénové de 3 pièces dans le qu... | L'appartement est situé au cœur du village Mon... | 50631208 | https://www.airbnb.com/users/show/50631208 | Laurent | Buttes-Montmartre | 48.88723 | ... | 3 | 3 | 254 | 462 | 2023-02-25 | 4 | 5.34 | 111 | 16650.0 | 180-270 D |
| 2062 | 2464417 | https://www.airbnb.com/rooms/2464417 | PARIS 1er CENTER - MUSEE DU LOUVRE | This charming Parisian flat, recently renovate... | A few hundred meters from the Opera and the Lo... | 6851238 | https://www.airbnb.com/users/show/6851238 | Victoria | Louvre | 48.86513 | ... | 1 | 1 | 94 | 461 | 2023-02-27 | 4 | 4.27 | 271 | 66395.0 | 90-180 D |
| 9583 | 10614603 | https://www.airbnb.com/rooms/10614603 | Studio in Montmartre | Cozy studio, stunning views over the rooftops ... | The area is very close to the Sacre Coeur and ... | 54776077 | https://www.airbnb.com/users/show/54776077 | Vladimir | Buttes-Montmartre | 48.88688 | ... | 1 | 1 | 166 | 459 | 2023-03-10 | 1 | 5.29 | 199 | 12935.0 | 90-180 D |
| 18457 | 21780368 | https://www.airbnb.com/rooms/21780368 | Beautiful Family Apartment in central Paris | (Attention : Voisins sensibles au bruit. Fêtes... | 9304128 | https://www.airbnb.com/users/show/9304128 | Louise | Élysée | 48.87131 | ... | 1 | 1 | 135 | 459 | 2023-03-11 | 10 | 7.09 | 230 | 115000.0 | 90-180 D | |
| 2080 | 2302860 | https://www.airbnb.com/rooms/2302860 | Appartement cosy Marais - annulation flexible | Cute 2 rooms in the Marais, of 28 m2 / 300 Sq ... | In the heart of the Marais in a very commercia... | 8994534 | https://www.airbnb.com/users/show/8994534 | Nicolas | Hôtel-de-Ville | 48.85638 | ... | 2 | 2 | 25 | 456 | 2023-02-26 | 1 | 4.57 | 340 | 39440.0 | < 84 D |
100 rows × 23 columns
top100review['price'].mean()
143.41
top100review.groupby('room_type')['price'].mean()
room_type Entire home/apt 157.304348 Hotel room 204.666667 Private room 110.371429 Shared room 43.000000 Name: price, dtype: float64
connection = psycopg2.connect(user="postgres",
password=passwd,
host="127.0.0.1",
port="5432",
database="AirBnB_Database")
try:
#execution
cursor = connection.cursor()
#valider la transaction
print('connection succes')
except(Exception) as erreur:
print(erreur)
connection succes
requete='CREATE TABLE airbnb_paris_listings_m2i (id_k SERIAL PRIMARY KEY NOT NULL)'
try:
cursor.execute(requete)
connection.commit()
print('table cerated with succes')
except(Exception) as erreur:
print(erreur)
connection.rollback()
table cerated with succes
dict_type={'int64':'decimal', 'object':'text', 'float64':'real', 'datetime64[ns]':'date', 'category' : 'text'}
df1.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 56726 entries, 0 to 56725 Data columns (total 23 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 id 56726 non-null int64 1 listing_url 56726 non-null object 2 name 56726 non-null object 3 description 56726 non-null object 4 neighborhood_overview 56726 non-null object 5 host_id 56726 non-null int64 6 host_url 56726 non-null object 7 host_name 56726 non-null object 8 neighbourhood_cleansed 56726 non-null object 9 latitude 56726 non-null float64 10 longitude 56726 non-null float64 11 room_type 56726 non-null object 12 price 56726 non-null float64 13 minimum_nights 56726 non-null int64 14 minimum_nights 56726 non-null int64 15 availability_365 56726 non-null int64 16 number_of_reviews 56726 non-null int64 17 last_review 56726 non-null datetime64[ns] 18 calculated_host_listings_count 56726 non-null int64 19 reviews_per_month 56726 non-null float64 20 reserved_365 56726 non-null int64 21 annual_income 56726 non-null float64 22 availability_365_range 33706 non-null category dtypes: category(1), datetime64[ns](1), float64(5), int64(8), object(8) memory usage: 9.6+ MB
df1 = df1.loc[:,~df1.columns.duplicated()]
df1.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 56726 entries, 0 to 56725 Data columns (total 22 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 id 56726 non-null int64 1 listing_url 56726 non-null object 2 name 56726 non-null object 3 description 56726 non-null object 4 neighborhood_overview 56726 non-null object 5 host_id 56726 non-null int64 6 host_url 56726 non-null object 7 host_name 56726 non-null object 8 neighbourhood_cleansed 56726 non-null object 9 latitude 56726 non-null float64 10 longitude 56726 non-null float64 11 room_type 56726 non-null object 12 price 56726 non-null float64 13 minimum_nights 56726 non-null int64 14 availability_365 56726 non-null int64 15 number_of_reviews 56726 non-null int64 16 last_review 56726 non-null datetime64[ns] 17 calculated_host_listings_count 56726 non-null int64 18 reviews_per_month 56726 non-null float64 19 reserved_365 56726 non-null int64 20 annual_income 56726 non-null float64 21 availability_365_range 33706 non-null category dtypes: category(1), datetime64[ns](1), float64(5), int64(7), object(8) memory usage: 9.1+ MB
table_name = 'airbnb_paris_listings_m2i'
request = 'alter table ' + table_name
colList = []
# map new type with a dictionary cause the function dtype don't work on object
for type in dict_type:
for select in df1.select_dtypes(include= type).columns:
if select not in request :
request += ' add column ' + select+ " " + dict_type[type] + ','
colList.append(select)
request = (request[:-1] + ";")
try:
cursor.execute(request)
connection.commit()
print('alter table succes')
except(Exception) as erreur:
print(erreur)
connection.rollback()
alter table succes
# set the column in thr right order
df1 = df1[colList]
tupled_data = [tuple(x) for x in df1.to_numpy()]
request = f"INSERT INTO airbnb_paris_listings_m2i ({', '.join(colList)}) VALUES %s"
try:
extras.execute_values(cursor, request, tupled_data)
connection.commit()
print('data insert with succes')
except(Exception) as erreur:
print(erreur)
connection.rollback()
data insert with succes
data = pd.read_csv('reviews.csv')
df2 = data.copy()
df2.head(10)
| listing_id | id | date | reviewer_id | reviewer_name | comments | |
|---|---|---|---|---|---|---|
| 0 | 5396 | 4824 | 2009-06-30 | 19995 | Sarah | Perfect location!! Nasrine was a delight and m... |
| 1 | 5396 | 4968 | 2009-07-03 | 20117 | Chris | This is a nice place in a great location in Pa... |
| 2 | 5396 | 5240 | 2009-07-08 | 22190 | Annelaure | Nice studio, very clean, very quiet, in a grea... |
| 3 | 5396 | 9619 | 2009-09-10 | 11947 | Jean | Superb location, great studio. \r<br/>A slice ... |
| 4 | 5396 | 18970 | 2009-12-02 | 40625 | Bette | Perfect place to be in Paris, walking home eve... |
| 5 | 5396 | 25574 | 2010-02-07 | 50781 | Amy | Wonderful hosts - very accommodating, friendly... |
| 6 | 5396 | 38642 | 2010-04-28 | 106430 | Amy | Best location in Paris - very privy location a... |
| 7 | 5396 | 46432 | 2010-05-26 | 46034 | Suzanne | Just a short walk from several Metro stations,... |
| 8 | 5396 | 53866 | 2010-06-17 | 69258 | Charles | I can not imagine a better location to stay in... |
| 9 | 5396 | 92977 | 2010-09-08 | 36827 | Martin | I always stay on the Ile Saint Louis when I am... |
df2.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 1406845 entries, 0 to 1406844 Data columns (total 6 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 listing_id 1406845 non-null int64 1 id 1406845 non-null int64 2 date 1406845 non-null object 3 reviewer_id 1406845 non-null int64 4 reviewer_name 1406844 non-null object 5 comments 1406749 non-null object dtypes: int64(3), object(3) memory usage: 64.4+ MB
df2['comments'] = df2['comments'].str.lower()
df2.head(10)
| listing_id | id | date | reviewer_id | reviewer_name | comments | |
|---|---|---|---|---|---|---|
| 0 | 5396 | 4824 | 2009-06-30 | 19995 | Sarah | perfect location!! nasrine was a delight and m... |
| 1 | 5396 | 4968 | 2009-07-03 | 20117 | Chris | this is a nice place in a great location in pa... |
| 2 | 5396 | 5240 | 2009-07-08 | 22190 | Annelaure | nice studio, very clean, very quiet, in a grea... |
| 3 | 5396 | 9619 | 2009-09-10 | 11947 | Jean | superb location, great studio. \r<br/>a slice ... |
| 4 | 5396 | 18970 | 2009-12-02 | 40625 | Bette | perfect place to be in paris, walking home eve... |
| 5 | 5396 | 25574 | 2010-02-07 | 50781 | Amy | wonderful hosts - very accommodating, friendly... |
| 6 | 5396 | 38642 | 2010-04-28 | 106430 | Amy | best location in paris - very privy location a... |
| 7 | 5396 | 46432 | 2010-05-26 | 46034 | Suzanne | just a short walk from several metro stations,... |
| 8 | 5396 | 53866 | 2010-06-17 | 69258 | Charles | i can not imagine a better location to stay in... |
| 9 | 5396 | 92977 | 2010-09-08 | 36827 | Martin | i always stay on the ile saint louis when i am... |
df2['date'] = pd.to_datetime(df2['date'], format='%Y-%m-%d')
df2.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 1406845 entries, 0 to 1406844 Data columns (total 6 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 listing_id 1406845 non-null int64 1 id 1406845 non-null int64 2 date 1406845 non-null datetime64[ns] 3 reviewer_id 1406845 non-null int64 4 reviewer_name 1406844 non-null object 5 comments 1406749 non-null object dtypes: datetime64[ns](1), int64(3), object(2) memory usage: 64.4+ MB
request='CREATE TABLE airbnb_paris_reviews (id_k SERIAL PRIMARY KEY NOT NULL)'
try:
cursor.execute(request)
connection.commit()
print('succes')
except(Exception) as erreur:
print(erreur)
connection.rollback()
succes
dict_types={'int64':'decimal', 'object':'text', 'float64':'real', 'datetime64[ns]':'date', 'category' : 'text'}
table_name = 'airbnb_paris_reviews'
request = 'alter table ' + table_name
colList = []
for type in dict_types:
for select in df2.select_dtypes(include= type).columns:
if select not in request :
request += ' add column ' + select+ " " + dict_types[type] + ','
colList.append(select)
request = (request[:-1] + ";")
try:
cursor.execute(request)
connection.commit()
print('alter data column succes')
except(Exception) as erreur:
print(erreur)
connection.rollback()
alter data column succes
df2 = df2[colList]
tupled_data = [tuple(x) for x in df2.to_numpy()]
requete = f"INSERT INTO airbnb_paris_reviews ({', '.join(colList)}) VALUES %s"
try:
extras.execute_values(cursor, requete, tupled_data)
connection.commit()
print('insert data succes')
except(Exception) as erreur:
print(erreur)
connection.rollback()
insert data succes
requete = """
SELECT li.id, li.name, li.listing_url
FROM airbnb_paris_reviews AS re
LEFT JOIN airbnb_paris_listings_m2i AS li ON re.listing_id = li.id
GROUP BY li.id, li.name, li.listing_url
ORDER BY COUNT(re.comments) DESC
LIMIT 100
"""
try:
cursor.execute(requete)
result = cursor.fetchall()
print(result)
except(Exception) as error:
connection.rollback()
print(error)
[(Decimal('26244787'), 'Double/Twin Room, close to Opera and the Louvre with breakfast included', 'https://www.airbnb.com/rooms/26244787'), (Decimal('17222007'), 'Sweet & cosy room next to Canal Saint Martin ❤️', 'https://www.airbnb.com/rooms/17222007'), (Decimal('41020735'), 'Bed in Dorm of 8 Beds "The Big One" in Paris', 'https://www.airbnb.com/rooms/41020735'), (Decimal('35145338'), 'Nice Room for 2 people', 'https://www.airbnb.com/rooms/35145338'), (Decimal('40194697'), 'Comfortable bed in shared rooms of 8 in Paris 12e', 'https://www.airbnb.com/rooms/40194697'), (Decimal('32518543'), 'Hotel Boronali *** - Double room in Montmartre', 'https://www.airbnb.com/rooms/32518543'), (Decimal('846954'), 'Gde chambre ds grand appart. Salle de bain privée', 'https://www.airbnb.com/rooms/846954'), (Decimal('32057469'), 'Sweet Little Room near St Martin Canal', 'https://www.airbnb.com/rooms/32057469'), (Decimal('1249964'), 'A Journey Into The Heart Of Paris', 'https://www.airbnb.com/rooms/1249964'), (Decimal('35242476'), 'Superb room for two', 'https://www.airbnb.com/rooms/35242476'), (Decimal('2488829'), 'Place de la Bastille.', 'https://www.airbnb.com/rooms/2488829'), (Decimal('38891995'), 'Hotel Augustin*** Double room', 'https://www.airbnb.com/rooms/38891995'), (Decimal('35065'), 'Lovely Loft Saint-Germain des Pres', 'https://www.airbnb.com/rooms/35065'), (Decimal('8532948'), 'Comfortable bedroom+bathroom', 'https://www.airbnb.com/rooms/8532948'), (Decimal('6406706'), 'Charming guest room', 'https://www.airbnb.com/rooms/6406706'), (Decimal('5724631'), 'AMAZING VIEW OF THE EIFFEL TOWER!', 'https://www.airbnb.com/rooms/5724631'), (Decimal('3622007'), 'Art Studio on courtyard with trees', 'https://www.airbnb.com/rooms/3622007'), (Decimal('2412144'), 'Private suite 50 sqm opéra madeleine Vendôme place', 'https://www.airbnb.com/rooms/2412144'), (Decimal('5678736'), 'Studio Place de la Bastille', 'https://www.airbnb.com/rooms/5678736'), (Decimal('31868079'), 'Room & Pool', 'https://www.airbnb.com/rooms/31868079'), (Decimal('36063785'), 'Generator - 1 Bed in 8 Bed Dormitory', 'https://www.airbnb.com/rooms/36063785'), (Decimal('1070274'), 'Studio with Panoramic view of Paris', 'https://www.airbnb.com/rooms/1070274'), (Decimal('4911410'), 'OPERA - Beau LOFT Paris 17ème siècle 4 Pers+enfant', 'https://www.airbnb.com/rooms/4911410'), (Decimal('35590372'), 'Double Classic Room', 'https://www.airbnb.com/rooms/35590372'), (Decimal('8313266'), 'Marais Beaubourg Gravilliers with air conditioning', 'https://www.airbnb.com/rooms/8313266'), (Decimal('29581287'), 'Romantic room with a view on the Invalides', 'https://www.airbnb.com/rooms/29581287'), (Decimal('1250856'), 'Beautiful Room in Quartier Latin', 'https://www.airbnb.com/rooms/1250856'), (Decimal('1706112'), 'Cozy & unique apartment in a designer house', 'https://www.airbnb.com/rooms/1706112'), (Decimal('11034796'), 'Bedroom near Buttes Chaumont park', 'https://www.airbnb.com/rooms/11034796'), (Decimal('25788058'), 'A stay like a Parisian', 'https://www.airbnb.com/rooms/25788058'), (Decimal('753143'), 'A NICE LOVELY TYPICALLY FRENCH ROOM', 'https://www.airbnb.com/rooms/753143'), (Decimal('4381461'), 'Stylish apartment in Paris near Eiffel Tower for 4', 'https://www.airbnb.com/rooms/4381461'), (Decimal('3734970'), 'Located well', 'https://www.airbnb.com/rooms/3734970'), (Decimal('11710116'), 'Place des Vosges district with Air Conditioning', 'https://www.airbnb.com/rooms/11710116'), (Decimal('391189'), 'Cosy quiet room in central Paris', 'https://www.airbnb.com/rooms/391189'), (Decimal('193632'), 'Charming Studio - Heart of Marais', 'https://www.airbnb.com/rooms/193632'), (Decimal('2247629'), 'Bedroom at the foot of Montmartre', 'https://www.airbnb.com/rooms/2247629'), (Decimal('6921651'), 'Champs Elysée Near President Palace 4 Pers + Child', 'https://www.airbnb.com/rooms/6921651'), (Decimal('2173772'), 'Quiet Room with Skylight & Your Own Bathroom & WC', 'https://www.airbnb.com/rooms/2173772'), (Decimal('9194001'), '1Bedroom with AC 10min from the Eiffel Tower', 'https://www.airbnb.com/rooms/9194001'), (Decimal('780445'), 'Modern Studio Countryside in Paris', 'https://www.airbnb.com/rooms/780445'), (Decimal('35590371'), 'Single Room', 'https://www.airbnb.com/rooms/35590371'), (Decimal('18305959'), 'Warm atmosphere room in the heart of Paris', 'https://www.airbnb.com/rooms/18305959'), (Decimal('2013051'), 'Small charming studio in the Marais', 'https://www.airbnb.com/rooms/2013051'), (Decimal('17594732'), 'Le Haut Marais, rue Notre-Dame de Nazareth', 'https://www.airbnb.com/rooms/17594732'), (Decimal('41227843'), 'Superb Room for two', 'https://www.airbnb.com/rooms/41227843'), (Decimal('12736301'), 'MONTMARTRE AND STREET OF THE MARTYRS - LOFT 4/7 P', 'https://www.airbnb.com/rooms/12736301'), (Decimal('7074843'), 'ideal pour 2 face a la Tour Eiffel.', 'https://www.airbnb.com/rooms/7074843'), (Decimal('2343894'), 'Large studio with Balcony - NEW!!', 'https://www.airbnb.com/rooms/2343894'), (Decimal('4894506'), 'Spacious & Quiet Bedroom w/ Garden', 'https://www.airbnb.com/rooms/4894506'), (Decimal('192162'), '5th Heaven: Quartier Latin Studio', 'https://www.airbnb.com/rooms/192162'), (Decimal('35177054'), 'Superb Room for two', 'https://www.airbnb.com/rooms/35177054'), (Decimal('9825542'), 'Charm in Heart of Paris Vaugirard', 'https://www.airbnb.com/rooms/9825542'), (Decimal('1369349'), 'Splendid terrace magnificent view', 'https://www.airbnb.com/rooms/1369349'), (Decimal('38748871'), 'Hotel Acadia*** Double room', 'https://www.airbnb.com/rooms/38748871'), (Decimal('4022383'), 'AN AMAZING STUDIO WITH GARDEN', 'https://www.airbnb.com/rooms/4022383'), (Decimal('6754497'), 'Nice room Père Lachaise/ Bagnolet', 'https://www.airbnb.com/rooms/6754497'), (Decimal('371299'), 'New Apartment in Marais rue des Archives', 'https://www.airbnb.com/rooms/371299'), (Decimal('2150160'), 'Charming room in a charming flat', 'https://www.airbnb.com/rooms/2150160'), (Decimal('17186194'), 'Beautiful family apartment in the center of Paris', 'https://www.airbnb.com/rooms/17186194'), (Decimal('13365586'), 'LE MARAIS/LA SEINE ARTIST STUDIO', 'https://www.airbnb.com/rooms/13365586'), (Decimal('35157584'), 'Superb Room for two', 'https://www.airbnb.com/rooms/35157584'), (Decimal('3371766'), 'Cosy Apartment in Rue des Rosiers', 'https://www.airbnb.com/rooms/3371766'), (Decimal('4973468'), 'Cosy Room Paris Center in the Marais', 'https://www.airbnb.com/rooms/4973468'), (Decimal('15488105'), "Nice Bedroom is Paris' 15th Arrondissement", 'https://www.airbnb.com/rooms/15488105'), (Decimal('252525'), 'charming bedroom near metro OPERA', 'https://www.airbnb.com/rooms/252525'), (Decimal('13074447'), 'Triplex in Le Marais - "Bordeaux" idéal pour 5', 'https://www.airbnb.com/rooms/13074447'), (Decimal('12892570'), 'Marais beautiful loft', 'https://www.airbnb.com/rooms/12892570'), (Decimal('17917735'), 'Beautiful room & Hammam in a Hotel & Coworking', 'https://www.airbnb.com/rooms/17917735'), (Decimal('9446428'), 'Lovely Studio Pigalle/Montmartre', 'https://www.airbnb.com/rooms/9446428'), (Decimal('961308'), 'LUXURIOUS & COSY PARISIAN FLAT', 'https://www.airbnb.com/rooms/961308'), (Decimal('1587440'), 'Nice appartment on an island (bed & breakfast)', 'https://www.airbnb.com/rooms/1587440'), (Decimal('3694130'), 'Adorable studio in the heart of Montmartre', 'https://www.airbnb.com/rooms/3694130'), (Decimal('343648'), 'Studio la Garçonniere Paris-Marais', 'https://www.airbnb.com/rooms/343648'), (Decimal('729422'), 'Minimalist Industrial Style Loft, Center of Paris', 'https://www.airbnb.com/rooms/729422'), (Decimal('21810459'), 'Arty Room', 'https://www.airbnb.com/rooms/21810459'), (Decimal('29331544'), 'Hôtel Boronali *** - Room w/ Balcony in Montmartre', 'https://www.airbnb.com/rooms/29331544'), (Decimal('38929355'), 'Hotel Monterosa *** Standard Double room', 'https://www.airbnb.com/rooms/38929355'), (Decimal('1525865'), 'STAY IN THE HEART OF PARIS !', 'https://www.airbnb.com/rooms/1525865'), (Decimal('878992'), 'Studio Saint Paul Le Marais/Paris4e', 'https://www.airbnb.com/rooms/878992'), (Decimal('38900228'), 'Hotel 34B*** Standard Twin room', 'https://www.airbnb.com/rooms/38900228'), (Decimal('6111649'), 'Theatrical studio in Haut Marais', 'https://www.airbnb.com/rooms/6111649'), (Decimal('2005815'), 'cozy full renewed 2 Bdr Paris heart', 'https://www.airbnb.com/rooms/2005815'), (Decimal('13724244'), 'LOUVRE 3: UPPER CLASS SUITE - RUE SAINT HONORE', 'https://www.airbnb.com/rooms/13724244'), (Decimal('4772732'), '#CuttleStudio @ Paris Le Marais', 'https://www.airbnb.com/rooms/4772732'), (Decimal('4035620'), "Old caretaker's lodge - Porte de Charenton", 'https://www.airbnb.com/rooms/4035620'), (Decimal('314288'), 'Bed & breakfast Paris Gare de Lyon', 'https://www.airbnb.com/rooms/314288'), (Decimal('12537003'), 'Wonderful Cozy home in Paris, Canal str', 'https://www.airbnb.com/rooms/12537003'), (Decimal('38929029'), 'Hotel Palm*** Standard Double room', 'https://www.airbnb.com/rooms/38929029'), (Decimal('38899977'), 'Hotel 34B*** Standard Double room', 'https://www.airbnb.com/rooms/38899977'), (Decimal('566511'), 'SOFA bed in the heart of paris', 'https://www.airbnb.com/rooms/566511'), (Decimal('9094454'), 'A charming studio in the heart of Montmartre', 'https://www.airbnb.com/rooms/9094454'), (Decimal('2444138'), 'Downtown Paris 5 people duplex style', 'https://www.airbnb.com/rooms/2444138'), (Decimal('7725395'), 'Nice and private room in the center of Paris', 'https://www.airbnb.com/rooms/7725395'), (Decimal('10948338'), 'Paris je t’aime ,Montmartre village', 'https://www.airbnb.com/rooms/10948338'), (Decimal('579530'), 'AMAZING SUNNY studio PARIS 14!', 'https://www.airbnb.com/rooms/579530'), (Decimal('2464417'), 'PARIS 1er CENTER - MUSEE DU LOUVRE', 'https://www.airbnb.com/rooms/2464417'), (Decimal('10614603'), 'Studio in Montmartre', 'https://www.airbnb.com/rooms/10614603'), (Decimal('21780368'), 'Beautiful Family Apartment in central Paris', 'https://www.airbnb.com/rooms/21780368'), (Decimal('2302860'), 'Appartement cosy Marais - annulation flexible', 'https://www.airbnb.com/rooms/2302860')]